#!/usr/bin/env python2.4
import os, urllib, time

CATEGORIES = (
  "General",
  "People",
  "Objects",
  "Dates",
)

fin = open("pages.list", 'r')
categories = {}
current = None
for line in fin:
  line = line.strip()
  if line[:2] == '--' and line[-2:] == '--':
    current = line[2:-2]
    if current not in categories:
      categories[current] = []
  else:
    categories[current].append(line)
fin.close()

for cat in CATEGORIES:
  if cat in categories:
    for page in categories[cat]:
      print "Retrieving page", page, "..."
      url = "http://simple.wikipedia.org/wiki/%s" % (page)
      httpin = urllib.urlopen(url)
      html = httpin.read()
      httpin.close()
      if "ERR_ACCESS_DENIED" in html:
        print "  ...access denied :("
      fout = open(os.path.join('raw', page), 'w')
      fout.write('CATEOGRY:' + cat + '\n')
      fout.write(html)
      fout.close()
      time.sleep(2)
